###################################################################################
#_______________________________ Interval Activity _______________________________#
###################################################################################

# This script produces tables that contain the number of time intervals that experience
# activity (trades, quotes, quotes that change the NBBO) for certain symbols
# and dates. This script produces these tables from the data "symbol_interval_activity_2018.csv"
# in the "WRDS/Output" folder.
#
# The "symbol_interval_activity.csv" dataset contains the number of time intervals
# that experience activity for symbol-interval-date-activity categories level information.
# An example would be the number of milliseconds (interval) of trading (activity category)
# that occurred for SPY (symbol) on 02/06/2018 (date).
#
# This script first collpases the symbol-interval-date-activity categories
# to a symbol-interval-activity categories level dataset by averaging the
# activity counts over the trading days.
#
# Then, the script iterates through the symbols and produces a table
# with the rows containing the activity categories and the columns containing
# the intervals. It also calculates other activity numbers
# such as the number of intervals without activity, and the percent of intervals
# without activity. However, these numbers are actually computed directly
# in the excel spreadsheet "Interval_Activity_2018.csv", for the purpose of
# transparency.

################################################################################
#_______________________________ Load Libraries _______________________________#
################################################################################

library(data.table)
library(plyr)
library(dplyr)
library(reshape2)

#############################################################################
#_______________________________ Directories _______________________________#
#############################################################################

home.dir <- "path-to-data-appendix/3_Analysis_of_TAQ_data"
data.dir <- file.path(home.dir, 'WRDS_Server/Output')
fig.dir <- file.path(home.dir, 'Final_Output/Figures')
table.dir <- file.path(home.dir, 'Final_Output/Tables/interval_activity')

##################################################################################
#_______________________________ Hardcoded Values _______________________________#
##################################################################################

# Calculate the number of seconds in a trading day
num_hours_in_trading_day  <- 6.5
num_mins_in_hour          <- 60
num_secs_in_min           <- 60
num_secs_in_trading_day   <- num_hours_in_trading_day * num_mins_in_hour * num_secs_in_min

# Conversion between "interval_length" and "interval_name"
# Interval length in the data is the number of intervals in a second (1ms is 1000, 10s is 0.1)
# We convert to seconds by taking 1/interval_length
interval_length         <- c(0.1, 1, 10, 100, 1000, 10000)
interval_length_in_sec  <- 1 / interval_length
interval_name           <- c("10s", "1s", "100ms", "10ms", "1ms", "100micros")

###############################################################################
#_______________________________ Create Tables _______________________________#
###############################################################################

# Read in interval activity
sym_date_interval_activity <- data.table::fread(file.path(data.dir, "symbol_interval_activity_2018.csv"))

# Collapse to symbol-interval level:
# We average the activity across the dates.
sym_interval_activity <- sym_date_interval_activity %>%
                         dplyr::group_by(sym_root, sym_suffix, interval_length, type) %>%
                         dplyr::summarise(num_intervals = mean(num_intervals))

# Create symbol variable by concatenating symbol root and suffix
sym_interval_activity$symbol <- ifelse(is.na(sym_interval_activity$sym_suffix),
                                       sym_interval_activity$sym_root,
                                       paste(sym_interval_activity$sym_root, sym_interval_activity$sym_suffix))

# Create column with interval_length_in_sec, which is the "interval length"
# converted in terms of seconds (1ms is 0.001s, for example).
sym_interval_activity$interval_length_in_sec <- plyr::mapvalues(sym_interval_activity$interval_length, from = interval_length, to = interval_length_in_sec)
# Create variable containing the interval name.
sym_interval_activity$interval_name <- plyr::mapvalues(sym_interval_activity$interval_length_in_sec, from = interval_length_in_sec, to = interval_name)

# Extract symbols
symbols <- unique(sym_interval_activity$symbol)

# Loop over symbols
for (sym in symbols) {

    # Extract data and columns for the symbol
    sym_activity <- sym_interval_activity[sym_interval_activity$symbol == sym,]
    sym_activity <- sym_activity[,c("symbol", "interval_length_in_sec", "interval_name", "type", "num_intervals")]

    # Reshape (rows are interval lengths, columns are activity categories)
    sym_activity_by_interval <- dcast(sym_activity, symbol + interval_name + interval_length_in_sec ~ type, value.var = "num_intervals")

    # Order by length of interval (decreasing)
    sym_activity_by_interval <- sym_activity_by_interval[order(-sym_activity_by_interval$interval_length_in_sec),]

    # Calculate the total number of intervals in a full trading day
    # We divide the number of seconds in a trading day by the interval length (in seconds)
    # to obtain the number of intervals in a full trading day
    sym_activity_by_interval$total_intervals <- num_secs_in_trading_day * 1 / sym_activity_by_interval$interval_length_in_sec

    # Reorder columns
    sym_activity_by_interval <- sym_activity_by_interval[,c('symbol', 'interval_name', 'interval_length_in_sec', 'intvl_quote', 'intvl_trade',
                                                            'intvl_nbbo_change', 'intvl_trade_or_quote', 'intvl_trade_or_nbbo_change', 'total_intervals')]

    # Number of intervals of no activity = (Total intervals in trading day) - (Number of intervals with activity)
    sym_activity_by_interval$intvl_no_quote                  <- sym_activity_by_interval$total_intervals - sym_activity_by_interval$intvl_quote
    sym_activity_by_interval$intvl_no_trade                  <- sym_activity_by_interval$total_intervals - sym_activity_by_interval$intvl_trade
    sym_activity_by_interval$intvl_no_nbbo_change            <- sym_activity_by_interval$total_intervals - sym_activity_by_interval$intvl_nbbo_change
    sym_activity_by_interval$intvl_no_trade_or_quote         <- sym_activity_by_interval$total_intervals - sym_activity_by_interval$intvl_trade_or_quote
    sym_activity_by_interval$intvl_no_trade_or_nbbo_change   <- sym_activity_by_interval$total_intervals - sym_activity_by_interval$intvl_trade_or_nbbo_change

    # Number of intervals of no activity = (Total intervals in trading day) - (Number of intervals with activity)
    sym_activity_by_interval$pct_no_quote                    <- sym_activity_by_interval$intvl_no_quote                 /  sym_activity_by_interval$total_intervals
    sym_activity_by_interval$pct_no_trade                    <- sym_activity_by_interval$intvl_no_trade                 /  sym_activity_by_interval$total_intervals
    sym_activity_by_interval$pct_no_nbbo_change              <- sym_activity_by_interval$intvl_no_nbbo_change           /  sym_activity_by_interval$total_intervals
    sym_activity_by_interval$pct_no_trade_or_quote           <- sym_activity_by_interval$intvl_no_trade_or_quote        /  sym_activity_by_interval$total_intervals
    sym_activity_by_interval$pct_no_trade_or_nbbo_change     <- sym_activity_by_interval$intvl_no_trade_or_nbbo_change  /  sym_activity_by_interval$total_intervals

    # Transpose so that rows are activity categories and columns are intervals.
    sym_activity_by_type <- t(sym_activity_by_interval)
    sym_activity_by_type <- data.frame(sym_activity_by_type)
    rownames(sym_activity_by_type)

    # Export
    sym_file_name <- paste0(sym, "_interval_activity.csv")
    data.table::fwrite(sym_activity_by_type, file.path(table.dir, sym_file_name), row.names = TRUE)
}

# SPY Highest Volume Day
spy_feb_6_2018 <- sym_date_interval_activity[sym_date_interval_activity$sym_root == "SPY" &
                                             sym_date_interval_activity$date == "20180206",]

# Calculate interval length in seconds and the name of the interval
spy_feb_6_2018$interval_length_in_sec <- plyr::mapvalues(spy_feb_6_2018$interval_length, from = interval_length, to = interval_length_in_sec)
spy_feb_6_2018$interval_name <- plyr::mapvalues(spy_feb_6_2018$interval_length_in_sec, from = interval_length_in_sec, to = interval_name)

# Extract select variables
spy_feb_6_2018 <- spy_feb_6_2018[,c("sym_root", "interval_length_in_sec", "interval_name", "type", "num_intervals")]

# Reshape (rows are interval lengths, columns are activity categories)
spy_feb_6_2018_activity <- dcast(spy_feb_6_2018, sym_root + interval_name + interval_length_in_sec ~ type, value.var = "num_intervals")

# Reorder columns
spy_feb_6_2018_activity <- spy_feb_6_2018_activity[,c('sym_root', 'interval_name', 'interval_length_in_sec', 'intvl_quote', 'intvl_trade',  'intvl_nbbo_change', 'intvl_trade_or_quote', 'intvl_trade_or_nbbo_change')]

# Order by length of interval (decreasing)
spy_feb_6_2018_activity <- spy_feb_6_2018_activity[order(-spy_feb_6_2018_activity$interval_length_in_sec),]

# Transpose so rows are activity categories and columns are intervals.
spy_feb_6_2018_activity_type <- t(spy_feb_6_2018_activity)
spy_feb_6_2018_activity_type <- data.frame(spy_feb_6_2018_activity_type)

# Export
data.table::fwrite(spy_feb_6_2018_activity_type, file.path(table.dir, "spy_highest_volume_day_activity.csv"), row.names = TRUE)
